/*
Event study estimates split by entrepreneur ability quartile.
Ability is measured as an owner fixed effect from a pre-birth
production function regression. Produces results for Figures 2 and A2.
*/

clear
set maxvar 30000

cd "$root"
do "$root/code/admin/ado/stack_weight_panel.ado"
do "$root/code/admin/ado/trim_impute.ado"
do "$root/code/admin/ado/event_study.ado"

global figpath $root/estimates/admin/fe_quartiles

** first, set up the unstacked data
use data/admin/intermediate/penalty_cohort_final.dta, clear

* run regression to get individual FEs
gen log_rev = log(revenues)
gen log_workers = log(n_workers_total)
gen log_assets = log(total_assets)

reghdfe log_rev ind_cat2#c.log_workers ind_cat2#c.log_assets ///
    if (year < cohort_id | cohort_id == 0), ///
    a(ind_cat2#firm_age age ind_cat2#year) res
    * one change: removed ind_cat2#edlevel
predict resid, residuals

* generate rolling mean
sort lnr lfirm year
bys lnr lfirm : gen resid_sum = sum(resid)
bys lnr lfirm : gen resid_N = sum(resid != .)
gen owner_fe = resid_sum / resid_N

* calculate deciles based on the untreated group in t=-1
centile owner_fe if year == cohort_id - 1 & male == 0, ///
     centile(10 30 50 70 90)

* groups are 10-30, 30-50, 50-70, 70-90
gen owner_fe_decile = 1 if inrange(owner_fe, r(c_1), r(c_2))
forvalues c = 3/5 {
    local cm = `c' - 1
    replace owner_fe_decile = `cm' if inrange(owner_fe, r(c_`cm'), r(c_`c'))
}

bys lnr lfirm : gen lead_fe_decile = owner_fe_decile[_n+1] // to get t=-1
    
keep lnr lfirm year ///
    cohort_id year_first_child ///
    male age currently_own edlevel ///
    firm_age ind_cat* ///
    profit_before_self operating_profits lead_fe_decile
   
gen firm_age_match = firm_age
replace firm_age_match = 20 if firm_age > 20
    
* stack across cohorts and construct weights for the control firms
stack_weight_sample ind_cat2, ///
    agevar(firm_age_match) ///
    matchvar_m2(lead_fe_decile) ///
    min_event(-5) ///
    max_event(10) ///
    first_cohort(2002) ///
    last_cohort(2018) ///
    outcomes(profit_before_self)
    
* trim profits
trim_impute profit_before_self, ///
    year_lb(1996) ///
    year_ub(2018) ///
    suffix(_trim) ///
    centile_lb(0.5) ///
    centile_ub(99.5)

* one-hot encode the covariate dummies
gen age_bin = floor(age / 5)

quietly {
tab firm_age_match, gen(dfirmage)
tab ind_cat, gen(dind)
tab age_bin, gen(dage)
tab edlevel, gen(dedl)
}

* run event study separately by quartile
forvalues g = 0/1 {
forvalues d = 1/4 {

aggregate_event_study ///
        profit_before_self_trim if male == `g' & lead_fe_decile == `d', ///
        min_event(-5) ///
	max_event(10) ///
        x(dfirmage* dind* dage* dedl*) ///
	percentage ///
        store(temp) ///
        save("${figpath}/profit_before_self_trim_quartile`d'_male`g'_ATT.csv")
*profit_before_self_trim_quartile`d'_male`g'_ATT.csv
}

}

****************************************************************
****************************************************************
************** Repeat, requiring more periods
****************************************************************
****************************************************************

clear
set maxvar 30000

cd "$root"
do "$root/code/admin/ado/stack_weight_panel.ado"
do "$root/code/admin/ado/trim_impute.ado"
do "$root/code/admin/ado/event_study.ado"

global figpath $root/estimates/admin/fe_quartiles

** first, set up the unstacked data
use data/admin/intermediate/penalty_cohort_final.dta, clear
keep if firm_age >= 3 // ROBUSTNESS RESTRICTION

* run regression to get individual FEs
gen log_rev = log(revenues)
gen log_workers = log(n_workers_total)
gen log_assets = log(total_assets)

reghdfe log_rev ind_cat2#c.log_workers ind_cat2#c.log_assets ///
    if (year < cohort_id | cohort_id == 0), ///
    a(ind_cat2#firm_age age ind_cat2#year) res
    * one change: removed ind_cat2#edlevel
predict resid, residuals

* generate rolling mean
sort lnr lfirm year
bys lnr lfirm : gen resid_sum = sum(resid)
bys lnr lfirm : gen resid_N = sum(resid != .)
gen owner_fe = resid_sum / resid_N

* calculate deciles based on the untreated group in t=-1
centile owner_fe if year == cohort_id - 1 & male == 0, ///
     centile(10 30 50 70 90)

* groups are 10-30, 30-50, 50-70, 70-90
gen owner_fe_decile = 1 if inrange(owner_fe, r(c_1), r(c_2))
forvalues c = 3/5 {
    local cm = `c' - 1
    replace owner_fe_decile = `cm' if inrange(owner_fe, r(c_`cm'), r(c_`c'))
}

bys lnr lfirm : gen lead_fe_decile = owner_fe_decile[_n+1] // to get t=-1
    
keep lnr lfirm year ///
    cohort_id year_first_child ///
    male age currently_own edlevel ///
    firm_age ind_cat* ///
    profit_before_self operating_profits lead_fe_decile
   
gen firm_age_match = firm_age
replace firm_age_match = 20 if firm_age > 20
    
* stack across cohorts and construct weights for the control firms
stack_weight_sample ind_cat2, ///
    agevar(firm_age_match) ///
    matchvar_m2(lead_fe_decile) ///
    min_event(-5) ///
    max_event(10) ///
    first_cohort(2002) ///
    last_cohort(2018) ///
    outcomes(profit_before_self)
    
* trim profits
trim_impute profit_before_self, ///
    year_lb(1996) ///
    year_ub(2018) ///
    suffix(_trim) ///
    centile_lb(0.5) ///
    centile_ub(99.5)

* one-hot encode the covariate dummies
gen age_bin = floor(age / 5)

quietly {
tab firm_age_match, gen(dfirmage)
tab ind_cat, gen(dind)
tab age_bin, gen(dage)
tab edlevel, gen(dedl)
}

* run event study separately by quartile
forvalues g = 0/1 {
forvalues d = 1/4 {

aggregate_event_study ///
        profit_before_self_trim if male == `g' & lead_fe_decile == `d', ///
        min_event(-5) ///
	max_event(10) ///
        x(dfirmage* dind* dage* dedl*) ///
	percentage ///
        store(temp) ///
        save("${figpath}/profit_before_self_trim_quartile`d'_male`g'_ATT_3p.csv")
*profit_before_self_trim_quartile`d'_male`g'_ATT_3p.csv
}

}
